countMatrix <- ReadDataFrameFromTsv(file.name.path="../data/refSEQ_countMatrix.txt")
## ../data/refSEQ_countMatrix.txt read from disk!
# head(countMatrix)
designMatrix <- ReadDataFrameFromTsv(file.name.path="../design/all_samples_short_names.tsv.csv")
## ../design/all_samples_short_names.tsv.csv read from disk!
# head(designMatrix)
rownames <- as.integer(rownames(countMatrix))
rownames <- rownames[order(rownames)]
rownames.map <- convertGenesViaBiomart(specie="mm10", filter="entrezgene",
filter.values=rownames, c("external_gene_name",
"mgi_symbol", "entrezgene"))
noNaCountMatrix <- attachGeneColumnToDf(mainDf=countMatrix,
genesMap=rownames.map,
rowNamesIdentifier="entrezgene",
mapFromIdentifier="entrezgene",
mapToIdentifier="external_gene_name")
filteredCountsProp <- filterLowCounts(counts.dataframe=noNaCountMatrix,
is.normalized=FALSE,
design.dataframe=designMatrix,
cond.col.name="gcondition",
method.type="Proportion")
## features dimensions before normalization: 20730
## Filtering out low count features...
## 13580 features are to be kept for differential expression analysis with filtering method 3
pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(filteredCountsProp), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Prop-Un-Norm")
## [1] TRUE
pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(filteredCountsProp), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Prop-Un-Norm")
## [1] TRUE
pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(filteredCountsProp), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Prop-Un-Norm")
## [1] TRUE
plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)
normPropCountsUqua <- NormalizeData(data.to.normalize=filteredCountsProp,
norm.type="uqua",
design.matrix=designMatrix)
pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normPropCountsUqua), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normPropCountsUqua), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normPropCountsUqua), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)
Estimating Negative Control Genes to normalize data
#### estimating neg controls
neg.ctrls <- ReadDataFrameFromTsv(file.name.path="../data/full_SD_Neg_Control_genes_BMC_genomics.csv", row.names.col=NULL)
head(neg.ctrls)
## Ensembl.ID Mouse.Gene.2.1.ST.probeset.ID
## 1 ENSMUSG00000026544 17229948
## 2 ENSMUSG00000034071 17486529
## 3 ENSMUSG00000106547 17455231
## 4 ENSMUSG00000041057 17339476
## 5 ENSMUSG00000051788 17401386
## 6 ENSMUSG00000045267 17310835
## Mouse.Genome.430.2.0.probeset.ID MGI.Symbol logFC
## 1 1453074_at Dusp23 -1,41369277830705E-05
## 2 1437705_at Zfp551 -1,81377838872621E-05
## 3 1456750_at B230303O12Rik 2,86545700491914E-05
## 4 1428390_at Wdr43 4,57864397187535E-05
## 5 1431660_at 4930564D02Rik 7,335759288285E-05
## 6 1450585_at Tas2r119 -0,000114023
## AveExpr t P.Value adj.P.Val B
## 1 5,9439547687 -0,0003193989 0,9997464192 0,9998325291 -6,6151570846
## 2 5,9352982951 -0,0002109388 0,9998325291 0,9998325291 -6,6151571134
## 3 4,4201319911 0,0005493522 0,9995638521 0,9997997374 -6,6151569846
## 4 7,9698547438 0,0010387414 0,9991753108 0,9995290429 -6,6151565952
## 5 4,6481138989 0,0010651912 0,9991543115 0,9995290429 -6,6151565673
## 6 4,0937064457 -0,0015836005 0,9987427306 0,9994135667 -6,6151558794
neg.ctrls <- neg.ctrls$MGI.Symbol
neg.map <- convertGenesViaBiomart(specie="mm10", filter="mgi_symbol",
filter.values=neg.ctrls, c("external_gene_name",
"mgi_symbol", "entrezgene"))
neg.map.nna <- neg.map[-which(is.na(neg.map$entrezgene)),]
# neg.map.nna <- neg.map.nna[which(neg.map.nna$entrezgene %in% rownames(normPropCountsUqua)),]
#
neg.ctrls.entrez <- neg.map.nna$entrezgene
ind.ctrls <- which(rownames(normPropCountsUqua) %in% neg.ctrls.entrez)
counts.neg.ctrls <- normPropCountsUqua[ind.ctrls,]
pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(counts.neg.ctrls), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(counts.neg.ctrls), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(counts.neg.ctrls), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)
library(RUVSeq)
#groups <- makeGroups(designMatrix$classic)[1,,drop=FALSE]
neg.ctrl.list <- as.character(neg.map.nna$entrezgene[which(neg.map.nna$entrezgene %in% rownames(normPropCountsUqua))])
groups <- makeGroups(paste0(designMatrix$genotype, designMatrix$classic))[c(1, 3),]
ruvedSExprData <- RUVs(as.matrix(round(normPropCountsUqua)), cIdx=neg.ctrl.list,
scIdx=groups, k=5)
normExprData <- ruvedSExprData$normalizedCounts
pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normExprData), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA+RUV-Norm")
## [1] TRUE
pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normExprData), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA+RUV-Norm")
## [1] TRUE
pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normExprData), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA+RUV-Norm")
## [1] TRUE
plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)
pal <- RColorBrewer::brewer.pal(9, "Set1")
plotRLE(normExprData, outline=FALSE, col=pal[designMatrix$gcondition])
### edgering
desMat <- cbind(designMatrix, ruvedSExprData$W)
colnames(desMat) <- c(colnames(designMatrix), colnames(ruvedSExprData$W))
cc <- c("WTSD5 - WTHC5", "WTRS2 - WTHC7")
rescList1 <- applyEdgeR(counts=normExprData, design.matrix=desMat,
factors.column="gcondition",
weight.columns=c("W_1", "W_2", "W_3", "W_4"),
contrasts=cc, useIntercept=FALSE, p.threshold=1,
verbose=TRUE)
PlotHistPvalPlot(de.results=rescList1[[1]], design.matrix=desMat,
show.plot.flag=TRUE, plotly.flag=TRUE,
prefix.plot=names(rescList1)[1])
sd.pos.ctrls <- ReadDataFrameFromTsv(file.name.path="../data/SD_full_pos_control_genes_BMC_genomics.csv")
sd.pos.ctrls <- sd.pos.ctrls$MGI.Symbol
sd.pos.map <- convertGenesViaBiomart(specie="mm10", filter="mgi_symbol",
filter.values=sd.pos.ctrls, c("external_gene_name",
"mgi_symbol", "entrezgene"))
sd.pos.map.nna <- sd.pos.map[-which(is.na(sd.pos.map$entrezgene)),]
sd.pos.genes.entrez <- sd.pos.map.nna$entrezgene
## mapping ensembl gene id using biomart
res.o.map <- convertGenesViaBiomart(specie="mm10", filter="entrezgene",
filter.values=rownames(rescList1[[1]]),
c("external_gene_name", "mgi_symbol", "entrezgene"))
res.o <- attachGeneColumnToDf(mainDf=rescList1[[1]],
genesMap=res.o.map,
rowNamesIdentifier="entrezgene",
mapFromIdentifier="entrezgene",
mapToIdentifier="external_gene_name")
PlotVolcanoPlot(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[1], threshold=0.05,
positive.ctrls.list=sd.pos.genes.entrez)
# PlotMAPlotCounts(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
# show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[1], threshold=0.05)
PlotHistPvalPlot(de.results=rescList1[[2]], design.matrix=desMat,
show.plot.flag=TRUE, plotly.flag=TRUE,
prefix.plot=names(rescList1)[2])
rs2.pos.ctrls <- ReadDataFrameFromTsv(file.name.path="../data/RS2_pos_control_genes_BMC_genomics.csv")
rs2.pos.ctrls <- rs2.pos.ctrls$MGI.Symbol
rs2.pos.map <- convertGenesViaBiomart(specie="mm10", filter="mgi_symbol",
filter.values=rs2.pos.ctrls, c("external_gene_name",
"mgi_symbol", "entrezgene"))
rs2.pos.map.nna <- rs2.pos.map[-which(is.na(rs2.pos.map$entrezgene)),]
rs2.pos.genes.entrez <- rs2.pos.map.nna$entrezgene
res.o.map <- convertGenesViaBiomart(specie="mm10", filter="entrezgene",
filter.values=rownames(rescList1[[2]]),
c("external_gene_name", "mgi_symbol", "entrezgene"))
res.o <- attachGeneColumnToDf(mainDf=rescList1[[1]],
genesMap=res.o.map,
rowNamesIdentifier="entrezgene",
mapFromIdentifier="entrezgene",
mapToIdentifier="external_gene_name")
PlotVolcanoPlot(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[2], threshold=0.05,
positive.ctrls.list=rs2.pos.genes.entrez)
# PlotMAPlotCounts(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
# show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[2], threshold=0.05)